#病人数目：1007
#按照等级-临床结果-年龄-性别-身高-体重-怀孕状态排序后依次抽取每一折数据并裁减为3s片段
import os
import shutil
from patient_information import find_patient_files,load_patient_data,get_grade,get_murmur
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from python_speech_features import logfbank
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import wave
import librosa.display
import librosa
import soundfile
from spafe.features.gfcc import erb_spectrogram
from spafe.utils.vis import show_spectrogram
from spafe.utils.preprocessing import SlidingWindow
from data_split_kfold import cut_copy_files

excel_path ='/home/dsp610/HZH/2022_challenge_new/the-circor-digiscope-phonocardiogram-dataset-1.0.3/5_fold.xlsx'
data_directory = "/home/dsp610/HZH/2022_challenge_new/the-circor-digiscope-phonocardiogram-dataset-1.0.3/training_data"
out_directory="5fold_cut"
#创建五个文件夹存放3s数据
for i in range(5):
    kfold_out_directory = os.path.join(out_directory, str(i+1) + "_fold")
    if not os.path.exists(kfold_out_directory):
        os.makedirs(kfold_out_directory)
df = pd.read_excel(excel_path)
for i in range(len(df)):
    patient_ID =str(df['Patient ID'][i])
    fold_number = df['fold'][i]
    cut_copy_files(
        data_directory,
        patient_ID,
        os.path.join(out_directory,str(fold_number)+ "_fold")
    )
print(df)